Chris Bail
Duke University
website: https://www.chrisbail.net
github: https://github.com/cbail
Twitter: https://www.twitter.com/chris_bail
1) Take a moment and try to see whether you can make a call for other types of information about yourself, or someone else.
2) What type of data can you get access to?
3) What type of data can you not access?
Navigate to:
install.packages("rtweet")
app_name<-"YOURAPPNAMEHERE"
consumer_key<-"YOURKEYHERE"
consumer_secret<-"YOURSECRETHERE"
library(rtweet)
create_token(app=app_name, consumer_key=consumer_key, consumer_secret=consumer_secret,
set_renv = TRUE)
korea_tweets<-search_tweets("#Korea", n=3000, include_rts = FALSE)
names(korea_tweets)
head(korea_tweets$text)
ts_plot(korea_tweets, "3 hours") +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(face = "bold")) +
ggplot2::labs(
x = NULL, y = NULL,
title = "Frequency of Tweets about Korea from the Past Day",
subtitle = "Twitter status (tweet) counts aggregated using three-hour intervals",
caption = "\nSource: Data collected from Twitter's REST API via rtweet"
)
nk_tweets <- search_tweets("korea",
"lang:en", geocode = lookup_coords("usa"),
n = 1000, type="recent", include_rts=FALSE
)
geocoded <- lat_lng(nk_tweets)
par(mar = c(0, 0, 0, 0))
maps::map("state", lwd = .25)
with(geocoded, points(lng, lat, pch = 20, cex = .75, col = rgb(0, .3, .7, .75)))
sanders_tweets <- get_timelines(c("sensanders"), n = 5)
head(sanders_tweets$text)
sanders_twitter_profile <- lookup_users("sensanders")
sanders_twitter_profile$description
sanders_twitter_profile$location
sanders_twitter_profile$followers_count
sanders_favorites<-get_favorites("sensanders", n=5)
sanders_favorites$text
sanders_follows<-get_followers("sensanders")
rate_limits<-rate_limit()
head(rate_limits[,1:4])
get_trends("New York")
post_tweet("I love APIs")
1) Collect the most recent 100 tweets from CNN; 2) determine how many people follow CNN on twitter; and, 3) determine if CNN is currently tweeting about any subjects that are trending in your hometown.
#load list of twitter handles for elected officials
elected_officials<-read.csv("https://cbail.github.io/Elected_Officials_Twitter_Handles.csv",
stringsAsFactors = FALSE)
head(elected_officials)
name screen_name
1 Sen Luther Strange SenatorStrange
2 Rep. Mike Johnson RepMikeJohnson
3 Ted Budd RepTedBudd
4 Adriano Espaillat RepEspaillat
5 Rep. Blunt Rochester RepBRochester
6 Nanette D. Barragán RepBarragan
#create empty container to store tweets for each elected official
elected_official_tweets<-as.data.frame(NULL)
for(i in 1:nrow(elected_officials)){
# #first, check rate limits
rate_limits<-rate_limit()
limit<-rate_limits[rate_limits$query=="statuses/user_timeline",]
if(limit$remaining==0){
Sys.sleep(15*60)
}
#pull tweets
tweets<-get_timeline(elected_officials$screen_name[i], n=100)
#populate dataframe
elected_official_tweets<-rbind(elected_official_tweets, tweets)
#pause for one second to further prevent rate limiting
Sys.sleep(1)
#print number/iteration for debugging/monitoring progress
print(i)
}
install.packages("Rfacebook")
library(Rfacebook)
token <- "INSERTYOURNUMBERHERE"
getUsers("me", token=token)
my_likes <- getLikes(user="me", token=token)
duke_fb<-getPage("DukeUniv", token=token)
1) Find out which organization has more Facebook likes: CNN or the New York Times.
2) Determine which of both organization’s 100 most recent posts have received the most “likes.”
Here are a few: RgoogleMaps, Rfacebook, rOpenSci(this one combines many different APIs e.g. the Internet Archive), WDI,rOpenGov,rtimes
Many more are available but not yet on CRAN (install from github or using devtools)
For example, visualization (plotly)